{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/lightgbm/__init__.py:48: UserWarning: Starting from version 2.2.1, the library file in distribution wheels for macOS is built by the Apple Clang (Xcode_8.3.3) compiler.\n",
      "This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.\n",
      "Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.\n",
      "You can install the OpenMP library by the following command: ``brew install libomp``.\n",
      "  \"You can install the OpenMP library by the following command: ``brew install libomp``.\", UserWarning)\n"
     ]
    }
   ],
   "source": [
    "import numpy as np  # linear algebra\n",
    "import pandas as pd  #\n",
    "from datetime import datetime\n",
    "from scipy.stats import skew  # for some statistics\n",
    "from scipy.special import boxcox1p\n",
    "from scipy.stats import boxcox_normmax\n",
    "from sklearn.linear_model import ElasticNetCV, LassoCV, RidgeCV\n",
    "from sklearn.ensemble import GradientBoostingRegressor\n",
    "from sklearn.svm import SVR\n",
    "from sklearn.pipeline import make_pipeline\n",
    "from sklearn.preprocessing import RobustScaler\n",
    "from sklearn.model_selection import KFold, cross_val_score\n",
    "from sklearn.metrics import mean_squared_error\n",
    "from mlxtend.regressor import StackingCVRegressor\n",
    "from xgboost import XGBRegressor\n",
    "from lightgbm import LGBMRegressor\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from scipy.stats import norm\n",
    "from scipy import stats\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_train = pd.read_csv('./Output/csv/train_engineer.csv')\n",
    "df_test = pd.read_csv('./Output/csv/test_engineer.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "target = df_train['SalePrice']\n",
    "df_train.drop(['SalePrice'], axis = 1, inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "final_features = pd.get_dummies(df_train).reset_index(drop=True)\n",
    "final_test = pd.get_dummies(df_test).reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "overfit = []\n",
    "for i in final_features.columns:\n",
    "    counts = final_features[i].value_counts()\n",
    "    zeros = counts.iloc[0]\n",
    "    if zeros / len(final_features) * 100 > 99.94:\n",
    "        overfit.append(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# kfold is 10\n",
    "kfolds = KFold(n_splits=10, shuffle=True, random_state=42)\n",
    "# make the function to get the score\n",
    "def rmsle(y, y_pred):\n",
    "    return np.sqrt(mean_squared_error(y, y_pred))\n",
    "def cv_rmse(model, X=final_features):\n",
    "    rmse = np.sqrt(-cross_val_score(model, X, target, scoring=\"neg_mean_squared_error\", cv=kfolds))\n",
    "    return (rmse)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# setup models\n",
    "# all the parameter is based on existed data processes\n",
    "alphas_alt = [14.5, 14.6, 14.7, 14.8, 14.9, 15, 15.1, 15.2, 15.3, 15.4, 15.5]\n",
    "alphas2 = [5e-05, 0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006, 0.0007, 0.0008]\n",
    "e_alphas = [0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006, 0.0007]\n",
    "e_l1ratio = [0.8, 0.85, 0.9, 0.95, 0.99, 1]\n",
    "\n",
    "ridge = make_pipeline(RobustScaler(),\n",
    "                      RidgeCV(alphas=alphas_alt, cv=kfolds))\n",
    "\n",
    "lasso = make_pipeline(RobustScaler(),\n",
    "                      LassoCV(max_iter=1e7, alphas=alphas2,\n",
    "                              random_state=42, cv=kfolds))\n",
    "\n",
    "elasticnet = make_pipeline(RobustScaler(),\n",
    "                           ElasticNetCV(max_iter=1e7, alphas=e_alphas,\n",
    "                                        cv=kfolds, l1_ratio=e_l1ratio))\n",
    "                                        \n",
    "svr = make_pipeline(RobustScaler(),\n",
    "                      SVR(C= 20, epsilon= 0.008, gamma=0.0003,))\n",
    "\n",
    "\n",
    "gbr = GradientBoostingRegressor(n_estimators=3000, learning_rate=0.05,\n",
    "                                   max_depth=4, max_features='sqrt',\n",
    "                                   min_samples_leaf=15, min_samples_split=10, \n",
    "                                   loss='huber', random_state =42)\n",
    "                                   \n",
    "\n",
    "lightgbm = LGBMRegressor(objective='regression', \n",
    "                                       num_leaves=4,\n",
    "                                       learning_rate=0.01, \n",
    "                                       n_estimators=5000,\n",
    "                                       max_bin=200, \n",
    "                                       bagging_fraction=0.75,\n",
    "                                       bagging_freq=5, \n",
    "                                       bagging_seed=7,\n",
    "                                       feature_fraction=0.2,\n",
    "                                       feature_fraction_seed=7,\n",
    "                                       verbose=-1,\n",
    "                                       #min_data_in_leaf=2,\n",
    "                                       #min_sum_hessian_in_leaf=11\n",
    "                                       )\n",
    "                                       \n",
    "\n",
    "xgboost = XGBRegressor(learning_rate=0.01, n_estimators=3460,\n",
    "                                     max_depth=3, min_child_weight=0,\n",
    "                                     gamma=0, subsample=0.7,\n",
    "                                     colsample_bytree=0.7,\n",
    "                                     objective='reg:linear', nthread=-1,\n",
    "                                     scale_pos_weight=1, seed=27,\n",
    "                                     reg_alpha=0.00006)\n",
    "\n",
    "# stack\n",
    "stack_gen = StackingCVRegressor(regressors=(ridge, lasso, elasticnet,\n",
    "                                            gbr, xgboost, lightgbm),\n",
    "                                meta_regressor=xgboost,\n",
    "                                use_features_in_secondary=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.10409444557120766"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# ridge\n",
    "# linear regreesion without feature importance\n",
    "score = cv_rmse(ridge)\n",
    "score.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.10324134187898312"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# lasso\n",
    "# linear regreesion with no feature importance\n",
    "score = cv_rmse(lasso)\n",
    "score.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.10317876736244669"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# elastic net\n",
    "# linear model with no feature importance\n",
    "score = cv_rmse(elasticnet)\n",
    "score.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.10834618828263567"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# svr\n",
    "score = cv_rmse(svr)\n",
    "score.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.1302543918925113"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.ensemble import RandomForestRegressor\n",
    "# random Forest Regression\n",
    "forest_reg = RandomForestRegressor(n_estimators=40, random_state=42, \n",
    "                                   min_samples_leaf=3, max_features=0.5, n_jobs=-1, oob_score=True)\n",
    "score = cv_rmse(forest_reg)\n",
    "score.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.10896411067233729"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# lightgbm\n",
    "score = cv_rmse(lightgbm)\n",
    "score.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.11047061960146405"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# gbr\n",
    "score = cv_rmse(gbr)\n",
    "score.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/xgboost/core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n",
      "  if getattr(data, 'base', None) is not None and \\\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[22:46:32] WARNING: src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/xgboost/core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n",
      "  if getattr(data, 'base', None) is not None and \\\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[22:47:11] WARNING: src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/xgboost/core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n",
      "  if getattr(data, 'base', None) is not None and \\\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[22:47:50] WARNING: src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/xgboost/core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n",
      "  if getattr(data, 'base', None) is not None and \\\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[22:48:28] WARNING: src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/xgboost/core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n",
      "  if getattr(data, 'base', None) is not None and \\\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[22:49:07] WARNING: src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/xgboost/core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n",
      "  if getattr(data, 'base', None) is not None and \\\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[22:49:45] WARNING: src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/xgboost/core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n",
      "  if getattr(data, 'base', None) is not None and \\\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[22:50:24] WARNING: src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/xgboost/core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n",
      "  if getattr(data, 'base', None) is not None and \\\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[22:51:04] WARNING: src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/xgboost/core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n",
      "  if getattr(data, 'base', None) is not None and \\\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[22:51:44] WARNING: src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/xgboost/core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n",
      "  if getattr(data, 'base', None) is not None and \\\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[22:52:23] WARNING: src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "0.10712744442462667"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# xgboost\n",
    "score = cv_rmse(xgboost)\n",
    "score.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[23:02:12] WARNING: src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n",
      "[23:02:48] WARNING: src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n",
      "[23:03:23] WARNING: src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n",
      "[23:03:59] WARNING: src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n"
     ]
    }
   ],
   "source": [
    "stack_gen_model = stack_gen.fit(np.array(final_features), np.array(target))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "elastic_model_full_data = elasticnet.fit(final_feature, target)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lasso_model_full_data = lasso.fit(final_feature, target)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ridge_model_full_data = ridge.fit(final_feature, target)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[['MSSubClass', 0.0016349228130816357],\n",
       " ['LotFrontage', 0.0044197817823763644],\n",
       " ['LotArea', 0.00984627093082312],\n",
       " ['OverallQual', 0.26344680026712536],\n",
       " ['OverallCond', 0.007246163088317686],\n",
       " ['YearBuilt', 0.01809525578190701],\n",
       " ['YearRemodAdd', 0.007076926324877024],\n",
       " ['MasVnrArea', 0.0015510122402282716],\n",
       " ['BsmtFinSF1', 0.007337847061278609],\n",
       " ['BsmtFinSF2', 0.00010372698966913396],\n",
       " ['BsmtUnfSF', 0.0036360503211098644],\n",
       " ['TotalBsmtSF', 0.009585222246531919],\n",
       " ['1stFlrSF', 0.00941193753631238],\n",
       " ['2ndFlrSF', 0.004045206365407001],\n",
       " ['LowQualFinSF', 0.0],\n",
       " ['GrLivArea', 0.06993176318406105],\n",
       " ['BsmtFullBath', 0.00036409804467955735],\n",
       " ['BsmtHalfBath', 1.069448660745847e-05],\n",
       " ['FullBath', 0.010353285574276427],\n",
       " ['HalfBath', 0.0005704731856375642],\n",
       " ['BedroomAbvGr', 0.0005774649064151042],\n",
       " ['KitchenAbvGr', 0.0004964665573416421],\n",
       " ['TotRmsAbvGrd', 0.0012318142797127039],\n",
       " ['Fireplaces', 0.0026283178502128812],\n",
       " ['GarageYrBlt', 0.005485398136837366],\n",
       " ['GarageCars', 0.01694323744033494],\n",
       " ['GarageArea', 0.015399951300523703],\n",
       " ['WoodDeckSF', 0.0014382959048520178],\n",
       " ['OpenPorchSF', 0.0021232600929585073],\n",
       " ['EnclosedPorch', 0.0005349042893319406],\n",
       " ['3SsnPorch', 0.0],\n",
       " ['ScreenPorch', 0.00015855273981577763],\n",
       " ['PoolArea', 0.0],\n",
       " ['MiscVal', 3.494186666053486e-05],\n",
       " ['MoSold', 0.002063652722492988],\n",
       " ['YrSold', 0.001297412356459988],\n",
       " ['YrBltAndRemod', 0.039733671446982095],\n",
       " ['TotalSF', 0.2747224998738433],\n",
       " ['Total_sqr_footage', 0.09936963272150862],\n",
       " ['Total_Bathrooms', 0.014572418401372324],\n",
       " ['Total_porch_sf', 0.004140851796006113],\n",
       " ['haspool', 0.0],\n",
       " ['has2ndfloor', 5.24462924721548e-05],\n",
       " ['hasgarage', 0.0015291362666445037],\n",
       " ['hasbsmt', 0.0],\n",
       " ['hasfireplace', 0.0038761739119291183],\n",
       " ['MSZoning_C (all)', 0.0007548831738540757],\n",
       " ['MSZoning_FV', 0.0],\n",
       " ['MSZoning_RH', 0.0],\n",
       " ['MSZoning_RL', 0.0006713738945588191],\n",
       " ['MSZoning_RM', 0.0022640251541330265],\n",
       " ['Street_Grvl', 0.0],\n",
       " ['Street_Pave', 0.0],\n",
       " ['Alley_Grvl', 0.0],\n",
       " ['Alley_None', 0.00045672071886093896],\n",
       " ['Alley_Pave', 0.0001184291138225484],\n",
       " ['LotShape_IR1', 0.00048177413007633336],\n",
       " ['LotShape_IR2', 6.921196674037882e-06],\n",
       " ['LotShape_IR3', 0.0],\n",
       " ['LotShape_Reg', 0.0009499396569714851],\n",
       " ['LandContour_Bnk', 9.603525730160198e-05],\n",
       " ['LandContour_HLS', 0.0],\n",
       " ['LandContour_Low', 0.0],\n",
       " ['LandContour_Lvl', 0.0001975509971096705],\n",
       " ['LotConfig_Corner', 0.0002377541310976615],\n",
       " ['LotConfig_CulDSac', 6.173017549262177e-05],\n",
       " ['LotConfig_FR2', 1.0525192803949828e-05],\n",
       " ['LotConfig_FR3', 0.0],\n",
       " ['LotConfig_Inside', 0.0003297667920319404],\n",
       " ['LandSlope_Gtl', 0.0003306906667899144],\n",
       " ['LandSlope_Mod', 3.709405471344352e-05],\n",
       " ['LandSlope_Sev', 0.0],\n",
       " ['Neighborhood_Blmngtn', 0.0],\n",
       " ['Neighborhood_Blueste', 0.0],\n",
       " ['Neighborhood_BrDale', 1.2396580965291933e-05],\n",
       " ['Neighborhood_BrkSide', 0.0001387529147960985],\n",
       " ['Neighborhood_ClearCr', 0.0],\n",
       " ['Neighborhood_CollgCr', 0.00010852271542238793],\n",
       " ['Neighborhood_Crawfor', 0.000549271393459874],\n",
       " ['Neighborhood_Edwards', 0.0004378928934274863],\n",
       " ['Neighborhood_Gilbert', 1.430689673501348e-05],\n",
       " ['Neighborhood_IDOTRR', 0.0005265816980060657],\n",
       " ['Neighborhood_MeadowV', 0.0],\n",
       " ['Neighborhood_Mitchel', 1.1767782007978048e-05],\n",
       " ['Neighborhood_NAmes', 0.0004110194873757379],\n",
       " ['Neighborhood_NPkVill', 7.594379020786152e-06],\n",
       " ['Neighborhood_NWAmes', 0.00011937345976469354],\n",
       " ['Neighborhood_NoRidge', 9.359126091376074e-05],\n",
       " ['Neighborhood_NridgHt', 0.00010213253599788117],\n",
       " ['Neighborhood_OldTown', 0.0013925558315817839],\n",
       " ['Neighborhood_SWISU', 0.0],\n",
       " ['Neighborhood_Sawyer', 4.838626222560948e-05],\n",
       " ['Neighborhood_SawyerW', 2.5454726348841215e-05],\n",
       " ['Neighborhood_Somerst', 8.184206097630593e-05],\n",
       " ['Neighborhood_StoneBr', 0.0],\n",
       " ['Neighborhood_Timber', 0.0],\n",
       " ['Neighborhood_Veenker', 0.0],\n",
       " ['Condition1_Artery', 9.138928820132466e-05],\n",
       " ['Condition1_Feedr', 4.539069309392972e-05],\n",
       " ['Condition1_Norm', 0.00029692338376726806],\n",
       " ['Condition1_PosA', 0.0],\n",
       " ['Condition1_PosN', 7.018426436101594e-05],\n",
       " ['Condition1_RRAe', 1.3420442372586157e-05],\n",
       " ['Condition1_RRAn', 1.035896022238444e-05],\n",
       " ['Condition1_RRNe', 0.0],\n",
       " ['Condition1_RRNn', 0.0],\n",
       " ['Condition2_Artery', 0.0],\n",
       " ['Condition2_Feedr', 0.0],\n",
       " ['Condition2_Norm', 0.0],\n",
       " ['Condition2_PosA', 0.0],\n",
       " ['Condition2_PosN', 0.0],\n",
       " ['Condition2_RRAe', 0.0],\n",
       " ['Condition2_RRAn', 0.0],\n",
       " ['Condition2_RRNn', 0.0],\n",
       " ['BldgType_1Fam', 6.0922562425073064e-05],\n",
       " ['BldgType_2fmCon', 1.743632765590969e-05],\n",
       " ['BldgType_Duplex', 0.00039131391882178527],\n",
       " ['BldgType_Twnhs', 2.107143968124046e-06],\n",
       " ['BldgType_TwnhsE', 1.2988450538708071e-05],\n",
       " ['HouseStyle_1.5Fin', 0.00023800429877477035],\n",
       " ['HouseStyle_1.5Unf', 0.0],\n",
       " ['HouseStyle_1Story', 0.00025521170681840375],\n",
       " ['HouseStyle_2.5Fin', 0.0],\n",
       " ['HouseStyle_2.5Unf', 0.0],\n",
       " ['HouseStyle_2Story', 0.0001857093328235202],\n",
       " ['HouseStyle_SFoyer', 5.625638242644576e-07],\n",
       " ['HouseStyle_SLvl', 8.441087115764982e-05],\n",
       " ['RoofStyle_Flat', 5.9095730107823106e-05],\n",
       " ['RoofStyle_Gable', 0.0002640208575191781],\n",
       " ['RoofStyle_Gambrel', 0.0],\n",
       " ['RoofStyle_Hip', 0.00041647710016044754],\n",
       " ['RoofStyle_Mansard', 0.0],\n",
       " ['RoofStyle_Shed', 0.0],\n",
       " ['RoofMatl_CompShg', 7.997058213537297e-05],\n",
       " ['RoofMatl_Membran', 0.0],\n",
       " ['RoofMatl_Metal', 0.0],\n",
       " ['RoofMatl_Roll', 0.0],\n",
       " ['RoofMatl_Tar&Grv', 0.0],\n",
       " ['RoofMatl_WdShake', 0.0],\n",
       " ['RoofMatl_WdShngl', 0.0],\n",
       " ['Exterior1st_AsbShng', 0.0],\n",
       " ['Exterior1st_AsphShn', 0.0],\n",
       " ['Exterior1st_BrkComm', 0.0],\n",
       " ['Exterior1st_BrkFace', 4.935202823743488e-05],\n",
       " ['Exterior1st_CBlock', 0.0],\n",
       " ['Exterior1st_CemntBd', 4.7251730559104286e-05],\n",
       " ['Exterior1st_HdBoard', 6.664227879507667e-05],\n",
       " ['Exterior1st_ImStucc', 0.0],\n",
       " ['Exterior1st_MetalSd', 8.091023005313409e-05],\n",
       " ['Exterior1st_Plywood', 7.8631371201449e-05],\n",
       " ['Exterior1st_Stone', 0.0],\n",
       " ['Exterior1st_Stucco', 0.0],\n",
       " ['Exterior1st_VinylSd', 0.00031028047647115264],\n",
       " ['Exterior1st_Wd Sdng', 0.0002991654413045409],\n",
       " ['Exterior1st_WdShing', 0.0],\n",
       " ['Exterior2nd_AsbShng', 0.0],\n",
       " ['Exterior2nd_AsphShn', 0.0],\n",
       " ['Exterior2nd_Brk Cmn', 0.0],\n",
       " ['Exterior2nd_BrkFace', 0.0],\n",
       " ['Exterior2nd_CBlock', 0.0],\n",
       " ['Exterior2nd_CmentBd', 7.071132338967738e-05],\n",
       " ['Exterior2nd_HdBoard', 0.00012237857732761258],\n",
       " ['Exterior2nd_ImStucc', 0.0],\n",
       " ['Exterior2nd_MetalSd', 0.00028583821186687686],\n",
       " ['Exterior2nd_Other', 0.0],\n",
       " ['Exterior2nd_Plywood', 0.0002443709333386571],\n",
       " ['Exterior2nd_Stone', 0.0],\n",
       " ['Exterior2nd_Stucco', 0.0],\n",
       " ['Exterior2nd_VinylSd', 0.00028641126854992154],\n",
       " ['Exterior2nd_Wd Sdng', 0.00021838315542556201],\n",
       " ['Exterior2nd_Wd Shng', 0.0],\n",
       " ['MasVnrType_BrkCmn', 0.0],\n",
       " ['MasVnrType_BrkFace', 0.00030306647677730373],\n",
       " ['MasVnrType_None', 0.00021608689176656206],\n",
       " ['MasVnrType_Stone', 0.00018019618644417488],\n",
       " ['ExterQual_Ex', 0.0004949847883755421],\n",
       " ['ExterQual_Fa', 9.938328532161108e-05],\n",
       " ['ExterQual_Gd', 0.0005627358653655922],\n",
       " ['ExterQual_TA', 0.02228747196433917],\n",
       " ['ExterCond_Ex', 0.0],\n",
       " ['ExterCond_Fa', 0.0008194285129856355],\n",
       " ['ExterCond_Gd', 0.00018241950698572482],\n",
       " ['ExterCond_Po', 0.0],\n",
       " ['ExterCond_TA', 0.0003859556566477381],\n",
       " ['Foundation_BrkTil', 0.00010302848984900204],\n",
       " ['Foundation_CBlock', 0.0002525666640736909],\n",
       " ['Foundation_PConc', 0.00022295203987015313],\n",
       " ['Foundation_Slab', 1.5995879875601694e-06],\n",
       " ['Foundation_Stone', 0.0],\n",
       " ['Foundation_Wood', 0.0],\n",
       " ['BsmtQual_Ex', 0.0024110695622250795],\n",
       " ['BsmtQual_Fa', 0.0],\n",
       " ['BsmtQual_Gd', 0.0011592707360807153],\n",
       " ['BsmtQual_None', 0.0],\n",
       " ['BsmtQual_TA', 0.0011333230879239232],\n",
       " ['BsmtCond_Fa', 2.745431539581825e-05],\n",
       " ['BsmtCond_Gd', 2.23825953529931e-06],\n",
       " ['BsmtCond_None', 0.00019152366740563244],\n",
       " ['BsmtCond_Po', 0.0],\n",
       " ['BsmtCond_TA', 0.00028936296215037027],\n",
       " ['BsmtExposure_Av', 9.944407748135669e-05],\n",
       " ['BsmtExposure_Gd', 0.0004305269510052505],\n",
       " ['BsmtExposure_Mn', 5.897623761276758e-05],\n",
       " ['BsmtExposure_No', 0.0006077394874965026],\n",
       " ['BsmtExposure_None', 1.6885293410913015e-06],\n",
       " ['BsmtFinType1_ALQ', 0.00025703977287874666],\n",
       " ['BsmtFinType1_BLQ', 8.654726724710176e-05],\n",
       " ['BsmtFinType1_GLQ', 0.00040290938756455647],\n",
       " ['BsmtFinType1_LwQ', 8.503357354549965e-05],\n",
       " ['BsmtFinType1_None', 0.0],\n",
       " ['BsmtFinType1_Rec', 0.00011350470089615653],\n",
       " ['BsmtFinType1_Unf', 0.0001832037147562502],\n",
       " ['BsmtFinType2_ALQ', 0.0],\n",
       " ['BsmtFinType2_BLQ', 0.0],\n",
       " ['BsmtFinType2_GLQ', 0.0],\n",
       " ['BsmtFinType2_LwQ', 2.3887143616957046e-06],\n",
       " ['BsmtFinType2_None', 7.079526157525028e-06],\n",
       " ['BsmtFinType2_Rec', 3.562139355136021e-05],\n",
       " ['BsmtFinType2_Unf', 6.363180630320206e-05],\n",
       " ['Heating_Floor', 0.0],\n",
       " ['Heating_GasA', 0.0],\n",
       " ['Heating_GasW', 0.0],\n",
       " ['Heating_Grav', 0.0],\n",
       " ['Heating_OthW', 0.0],\n",
       " ['Heating_Wall', 0.0],\n",
       " ['HeatingQC_Ex', 0.00023332185174775727],\n",
       " ['HeatingQC_Fa', 0.0002854731188228166],\n",
       " ['HeatingQC_Gd', 0.00016202576393215253],\n",
       " ['HeatingQC_Po', 0.0],\n",
       " ['HeatingQC_TA', 0.0005500494964891321],\n",
       " ['CentralAir_N', 0.008070986094111905],\n",
       " ['CentralAir_Y', 0.002644155326434854],\n",
       " ['Electrical_FuseA', 0.000280066516304638],\n",
       " ['Electrical_FuseF', 3.44110393026633e-05],\n",
       " ['Electrical_FuseP', 0.0],\n",
       " ['Electrical_Mix', 0.0],\n",
       " ['Electrical_SBrkr', 4.361701725026348e-05],\n",
       " ['KitchenQual_Ex', 0.001531833546405001],\n",
       " ['KitchenQual_Fa', 0.00014764933761584755],\n",
       " ['KitchenQual_Gd', 0.0018005092990486934],\n",
       " ['KitchenQual_TA', 0.00276519219575731],\n",
       " ['Functional_Maj1', 0.00018938718323159746],\n",
       " ['Functional_Maj2', 0.0],\n",
       " ['Functional_Min1', 0.0],\n",
       " ['Functional_Min2', 1.2395749114749898e-05],\n",
       " ['Functional_Mod', 0.0],\n",
       " ['Functional_Sev', 0.0],\n",
       " ['Functional_Typ', 0.0005538614991524505],\n",
       " ['FireplaceQu_Ex', 1.887346215443348e-06],\n",
       " ['FireplaceQu_Fa', 0.0],\n",
       " ['FireplaceQu_Gd', 0.0011329476593016125],\n",
       " ['FireplaceQu_None', 0.0037953542030668063],\n",
       " ['FireplaceQu_Po', 0.0],\n",
       " ['FireplaceQu_TA', 0.00015617326839915087],\n",
       " ['GarageType_2Types', 0.0],\n",
       " ['GarageType_Attchd', 0.0008839195164336561],\n",
       " ['GarageType_Basment', 0.0],\n",
       " ['GarageType_BuiltIn', 9.17692928649254e-05],\n",
       " ['GarageType_CarPort', 6.595583304104008e-05],\n",
       " ['GarageType_Detchd', 0.0009504745554202218],\n",
       " ['GarageType_None', 5.241983340597303e-05],\n",
       " ['GarageFinish_Fin', 0.00027809438099887947],\n",
       " ['GarageFinish_None', 0.0],\n",
       " ['GarageFinish_RFn', 0.00022068332086107344],\n",
       " ['GarageFinish_Unf', 0.0007105300223852884],\n",
       " ['GarageQual_Ex', 0.0],\n",
       " ['GarageQual_Fa', 0.00015201129265569687],\n",
       " ['GarageQual_Gd', 0.0],\n",
       " ['GarageQual_None', 0.00013559632085941272],\n",
       " ['GarageQual_Po', 0.0],\n",
       " ['GarageQual_TA', 0.00014304940394318992],\n",
       " ['GarageCond_Ex', 0.0],\n",
       " ['GarageCond_Fa', 6.127478881141344e-05],\n",
       " ['GarageCond_Gd', 0.0],\n",
       " ['GarageCond_None', 0.00040358575502377053],\n",
       " ['GarageCond_Po', 0.0],\n",
       " ['GarageCond_TA', 0.00027300213255452595],\n",
       " ['PavedDrive_N', 0.0004507983995075601],\n",
       " ['PavedDrive_P', 2.0116598545284207e-05],\n",
       " ['PavedDrive_Y', 0.00014767060257347023],\n",
       " ['PoolQC_Ex', 0.0],\n",
       " ['PoolQC_Fa', 0.0],\n",
       " ['PoolQC_Gd', 0.0],\n",
       " ['PoolQC_None', 0.0],\n",
       " ['Fence_GdPrv', 9.029899274849935e-06],\n",
       " ['Fence_GdWo', 0.00020006121884887936],\n",
       " ['Fence_MnPrv', 1.853641878641855e-05],\n",
       " ['Fence_MnWw', 0.0],\n",
       " ['Fence_None', 0.00010862304283872541],\n",
       " ['MiscFeature_Gar2', 0.0],\n",
       " ['MiscFeature_None', 4.231188037549443e-06],\n",
       " ['MiscFeature_Othr', 0.0],\n",
       " ['MiscFeature_Shed', 0.0],\n",
       " ['MiscFeature_TenC', 0.0],\n",
       " ['SaleType_COD', 0.0],\n",
       " ['SaleType_CWD', 0.0],\n",
       " ['SaleType_Con', 0.0],\n",
       " ['SaleType_ConLD', 0.0],\n",
       " ['SaleType_ConLI', 0.0],\n",
       " ['SaleType_ConLw', 0.0],\n",
       " ['SaleType_New', 9.597872887171592e-05],\n",
       " ['SaleType_Oth', 0.0],\n",
       " ['SaleType_WD', 0.00029101276502153413],\n",
       " ['SaleCondition_Abnorml', 0.001232445454242609],\n",
       " ['SaleCondition_AdjLand', 0.0],\n",
       " ['SaleCondition_Alloca', 0.0],\n",
       " ['SaleCondition_Family', 6.472997645120376e-05],\n",
       " ['SaleCondition_Normal', 0.0015009383135730062],\n",
       " ['SaleCondition_Partial', 0.00035988771515465867]]"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "init_feature.values.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Plot feature importance\n",
    "def plot_feature_importance(model, df):\n",
    "    feature_importance = model.feature_importances_[:30]\n",
    "    # make importances relative to max importance\n",
    "    plt.figure(figsize=(20, 20)) #figure size\n",
    "    #making it a percentage relative to the max value\n",
    "    feature_importance = 100.0 * (feature_importance / feature_importance.max()) \n",
    "    sorted_idx = np.argsort(feature_importance)\n",
    "    pos = np.arange(sorted_idx.shape[0]) + .5\n",
    "    plt.barh(pos, feature_importance[sorted_idx], align='center')\n",
    "    #used train_drop here to show the name of each feature instead of our train_prepared\n",
    "    plt.yticks(pos, df.columns[sorted_idx], fontsize=15) \n",
    "    plt.xlabel('Relative Importance', fontsize=20)\n",
    "    plt.ylabel('Features', fontsize=20)\n",
    "    plt.title('Variable Importance', fontsize=30)\n",
    "    return pd.DataFrame({'columns_name': df.columns, 'feature_importance': model.feature_importances_})"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
